-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathstring_buffer.c
141 lines (122 loc) · 4.34 KB
/
string_buffer.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: [email protected] (Jonathan Tang)
#include "string_buffer.h"
#include <assert.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include "string_piece.h"
#include "util.h"
// Size chosen via statistical analysis of ~60K websites.
// 99% of text nodes and 98% of attribute names/values fit in this initial size.
static const size_t kDefaultStringBufferSize = 5;
static void maybe_resize_string_buffer(
size_t additional_chars, GumboStringBuffer* buffer) {
size_t new_length = buffer->length + additional_chars;
size_t new_capacity = buffer->capacity;
while (new_capacity < new_length) {
new_capacity *= 2;
}
if (new_capacity != buffer->capacity) {
buffer->capacity = new_capacity;
buffer->data = gumbo_realloc(buffer->data, buffer->capacity);
}
}
void gumbo_string_buffer_init(GumboStringBuffer* output) {
output->data = gumbo_malloc(kDefaultStringBufferSize);
output->length = 0;
output->capacity = kDefaultStringBufferSize;
}
void gumbo_string_buffer_reserve(
size_t min_capacity, GumboStringBuffer* output) {
maybe_resize_string_buffer(min_capacity - output->length, output);
}
void gumbo_string_buffer_append_codepoint(int c, GumboStringBuffer* output) {
// num_bytes is actually the number of continuation bytes, 1 less than the
// total number of bytes. This is done to keep the loop below simple and
// should probably change if we unroll it.
int num_bytes, prefix;
if (c <= 0x7f) {
num_bytes = 0;
prefix = 0;
} else if (c <= 0x7ff) {
num_bytes = 1;
prefix = 0xc0;
} else if (c <= 0xffff) {
num_bytes = 2;
prefix = 0xe0;
} else {
num_bytes = 3;
prefix = 0xf0;
}
maybe_resize_string_buffer(num_bytes + 1, output);
output->data[output->length++] = prefix | (c >> (num_bytes * 6));
for (int i = num_bytes - 1; i >= 0; --i) {
output->data[output->length++] = 0x80 | (0x3f & (c >> (i * 6)));
}
}
void gumbo_string_buffer_put(
GumboStringBuffer* buffer, const char* data, size_t length) {
maybe_resize_string_buffer(length, buffer);
memcpy(buffer->data + buffer->length, data, length);
buffer->length += length;
}
void gumbo_string_buffer_putv(GumboStringBuffer* buffer, int count, ...) {
va_list ap;
int i;
size_t total_len = 0;
va_start(ap, count);
for (i = 0; i < count; ++i) total_len += strlen(va_arg(ap, const char*));
va_end(ap);
maybe_resize_string_buffer(total_len, buffer);
va_start(ap, count);
for (i = 0; i < count; ++i) {
const char* data = va_arg(ap, const char*);
size_t length = strlen(data);
memcpy(buffer->data + buffer->length, data, length);
buffer->length += length;
}
va_end(ap);
}
void gumbo_string_buffer_append_string(
GumboStringPiece* str, GumboStringBuffer* output) {
gumbo_string_buffer_put(output, str->data, str->length);
}
const char* gumbo_string_buffer_cstr(GumboStringBuffer* buffer) {
maybe_resize_string_buffer(1, buffer);
/* do not increase length of the string */
buffer->data[buffer->length] = 0;
return buffer->data;
}
char* gumbo_string_buffer_to_string(GumboStringBuffer* input) {
char* buffer = gumbo_malloc(input->length + 1);
memcpy(buffer, input->data, input->length);
buffer[input->length] = '\0';
return buffer;
}
void gumbo_string_buffer_clear(GumboStringBuffer* input) {
input->length = 0;
if (input->capacity > kDefaultStringBufferSize * 8) {
// This approach to clearing means that the buffer can grow unbounded and
// tie up memory that may be needed for parsing the rest of the document, so
// we free and reinitialize the buffer if its grown more than 3 doublings.
gumbo_string_buffer_destroy(input);
gumbo_string_buffer_init(input);
}
}
void gumbo_string_buffer_destroy(GumboStringBuffer* buffer) {
gumbo_free(buffer->data);
}