@@ -645,6 +645,147 @@ try { // whole process is placed in a try/catch so we can log uncaught errors
645
645
return {
646
646
hrefs_without_special_scheme
647
647
} ;
648
+ } ) ( ) ,
649
+
650
+ 'dom-shape' : ( ( ) => {
651
+
652
+ //================================================================
653
+ // helpers
654
+ //================================================================
655
+
656
+ function depthOf ( node , currentDepth = 0 ) {
657
+ if ( node === document . body ) {
658
+ return currentDepth ;
659
+ } else {
660
+ return depthOf ( node . parentNode , currentDepth + 1 ) ;
661
+ }
662
+ }
663
+
664
+ function mean ( arr ) {
665
+ let total = arr . reduce ( ( acc , curr ) => acc + curr , 0 )
666
+ return total / arr . length ;
667
+ }
668
+
669
+ function stddev ( arr , mean ) {
670
+ let squareDiffs = arr . map ( ( k ) => ( k - mean ) ** 2 ) ;
671
+ let sumSquareDiffs = squareDiffs . reduce ( ( acc , curr ) => acc + curr , 0 ) ;
672
+ return Math . sqrt ( sumSquareDiffs / arr . length ) ;
673
+ }
674
+
675
+ function skew ( mean , mode , stdDev ) {
676
+ return ( mean - mode ) / stdDev ;
677
+ }
678
+
679
+ function median ( sortedArr ) {
680
+ let length = sortedArr . length ;
681
+ if ( length % 2 === 0 ) {
682
+ return ( sortedArr [ length / 2 - 1 ] + sortedArr [ length / 2 ] ) / 2 ;
683
+ } else {
684
+ return sortedArr [ ( length - 1 ) / 2 ] ;
685
+ }
686
+ }
687
+
688
+ function mode ( arr ) {
689
+ let mode = { } ;
690
+ let max = 0 , count = 0 ;
691
+ for ( const item of arr ) {
692
+ if ( mode [ item ] ) {
693
+ mode [ item ] ++ ;
694
+ } else {
695
+ mode [ item ] = 1 ;
696
+ }
697
+ if ( count < mode [ item ] ) {
698
+ max = item ;
699
+ count = mode [ item ] ;
700
+ }
701
+ }
702
+ return max
703
+ }
704
+
705
+ function range ( sortedArr ) {
706
+ return { min : sortedArr [ 0 ] , max : sortedArr [ sortedArr . length - 1 ] } ;
707
+ }
708
+
709
+ let stats = { } ;
710
+
711
+ stats . nodesCount = document . querySelector ( "*" ) . length ;
712
+
713
+ stats . bodyNodesCount = document . querySelectorAll ( "body *" ) . length
714
+ let bodyLeafNodes = document . querySelectorAll ( "body :not(:has(*))" ) ;
715
+ stats . bodyLeafNodesCount = bodyLeafNodes . length
716
+
717
+ let depths = [ ] ;
718
+ for ( const leafNode of bodyLeafNodes ) {
719
+ depths . push ( depthOf ( leafNode ) ) ;
720
+ }
721
+ depths . sort ( ( a , b ) => a - b ) ;
722
+
723
+ let meanDepth = mean ( depths ) ;
724
+ let stdDevDepth = stddev ( depths , meanDepth ) ;
725
+ let modeDepth = mode ( depths ) ;
726
+ let skewDepth = skew ( meanDepth , modeDepth , stdDevDepth ) ;
727
+ stats . depth = {
728
+ mean : meanDepth ,
729
+ median : median ( depths ) ,
730
+ mode : modeDepth ,
731
+ stddev : stdDevDepth ,
732
+ skew : skewDepth ,
733
+ range : range ( depths ) ,
734
+ }
735
+
736
+ let bodyNonLeafNodes = document . querySelectorAll ( "body :has(*)" ) ;
737
+ stats . bodyNonLeafNodesCount = bodyNonLeafNodes . length
738
+ let branchFactors = [ ] ;
739
+ for ( const nonLeafNode of bodyNonLeafNodes ) {
740
+ branchFactors . push ( nonLeafNode . childNodes . length ) ;
741
+ }
742
+ branchFactors . sort ( ( a , b ) => a - b ) ;
743
+
744
+ let meanBranchFactor = mean ( branchFactors ) ;
745
+ let stdDevBranchFactor = stddev ( branchFactors , meanBranchFactor ) ;
746
+ let modeBranchFactor = mode ( branchFactors ) ;
747
+ let skewBranchFactor = skew ( meanBranchFactor , modeBranchFactor , stdDevBranchFactor ) ;
748
+ stats . branchFactor = {
749
+ mean : meanBranchFactor ,
750
+ median : median ( branchFactors ) ,
751
+ mode : modeBranchFactor ,
752
+ stddev : stdDevBranchFactor ,
753
+ skew : skewBranchFactor ,
754
+ range : range ( branchFactors ) ,
755
+ }
756
+
757
+ stats . branchFactorByDepth = [ ]
758
+ for ( let depth = 0 ; depth < stats . depth . range . max ; depth ++ ) {
759
+
760
+ let selector = "body " ;
761
+ for ( let i = 0 ; i < depth ; i ++ ) {
762
+ selector += " > :has(*)" ;
763
+ }
764
+ let parentNodesAtThisLevel = document . querySelectorAll ( selector ) ;
765
+ let branchFactors = [ ] ;
766
+ for ( const nonLeafNode of parentNodesAtThisLevel ) {
767
+ branchFactors . push ( nonLeafNode . childNodes . length ) ;
768
+ }
769
+ branchFactors . sort ( ( a , b ) => a - b ) ;
770
+
771
+ let meanBranchFactor = mean ( branchFactors ) ;
772
+ let stdDevBranchFactor = stddev ( branchFactors , meanBranchFactor ) ;
773
+ let modeBranchFactor = mode ( branchFactors ) ;
774
+ let skewBranchFactor = skew ( meanBranchFactor , modeBranchFactor , stdDevBranchFactor ) ;
775
+ stats . branchFactorByDepth [ depth ] = {
776
+ depth : depth ,
777
+ count : parentNodesAtThisLevel . length ,
778
+ mean : meanBranchFactor ,
779
+ stddev : stdDevBranchFactor ,
780
+ skew : skewBranchFactor ,
781
+ median : median ( branchFactors ) ,
782
+ mode : modeBranchFactor ,
783
+ range : range ( branchFactors ) ,
784
+ }
785
+ }
786
+
787
+ return stats ;
788
+
648
789
} ) ( )
649
790
} ;
650
791
}
0 commit comments