Add StringOutputBuffer class.
This CL adds a new helper class used to store very large string
output efficiently in memory, while allowing it to be written to
a file, or compared with an existing file's content.
This will be used in future CLs that will speed up and reduce the
RAM usage of some operations like the ones handling --ide=json or
-export-compile-command.
+ Add FileWriter class to encapsulate the Windows-specific
creation logic required to work-around limitations of
base::WriteFile()
Change-Id: I87f1ba716e066769ca7c1dc28a46b1460022f1dd
Reviewed-on: https://gn-review.googlesource.com/c/gn/+/7940
Commit-Queue: Scott Graham <scottmg@chromium.org>
Reviewed-by: Scott Graham <scottmg@chromium.org>
diff --git a/build/gen.py b/build/gen.py
index 44d32c8..582562c 100755
--- a/build/gen.py
+++ b/build/gen.py
@@ -485,6 +485,7 @@
'src/gn/escape.cc',
'src/gn/exec_process.cc',
'src/gn/filesystem_utils.cc',
+ 'src/gn/file_writer.cc',
'src/gn/frameworks_utils.cc',
'src/gn/function_exec_script.cc',
'src/gn/function_filter.cc',
@@ -561,6 +562,7 @@
'src/gn/source_file.cc',
'src/gn/standard_out.cc',
'src/gn/string_atom.cc',
+ 'src/gn/string_output_buffer.cc',
'src/gn/string_utils.cc',
'src/gn/substitution_list.cc',
'src/gn/substitution_pattern.cc',
@@ -609,6 +611,7 @@
'src/gn/escape_unittest.cc',
'src/gn/exec_process_unittest.cc',
'src/gn/filesystem_utils_unittest.cc',
+ 'src/gn/file_writer_unittest.cc',
'src/gn/frameworks_utils_unittest.cc',
'src/gn/function_filter_unittest.cc',
'src/gn/function_foreach_unittest.cc',
@@ -661,6 +664,7 @@
'src/gn/source_dir_unittest.cc',
'src/gn/source_file_unittest.cc',
'src/gn/string_atom_unittest.cc',
+ 'src/gn/string_output_buffer_unittest.cc',
'src/gn/string_utils_unittest.cc',
'src/gn/substitution_pattern_unittest.cc',
'src/gn/substitution_writer_unittest.cc',
diff --git a/src/gn/file_writer.cc b/src/gn/file_writer.cc
new file mode 100644
index 0000000..a4bb5be
--- /dev/null
+++ b/src/gn/file_writer.cc
@@ -0,0 +1,108 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gn/file_writer.h"
+
+#include "base/files/file_path.h"
+#include "base/logging.h"
+#include "gn/filesystem_utils.h"
+
+#if defined(OS_WIN)
+#include <windows.h>
+#include "base/strings/utf_string_conversions.h"
+#else
+#include <fcntl.h>
+#include <unistd.h>
+#include "base/posix/eintr_wrapper.h"
+#endif
+
+FileWriter::~FileWriter() = default;
+
+#if defined(OS_WIN)
+
+bool FileWriter::Create(const base::FilePath& file_path) {
+ // On Windows, provide a custom implementation of base::WriteFile. Sometimes
+ // the base version fails, especially on the bots. The guess is that Windows
+ // Defender or other antivirus programs still have the file open (after
+ // checking for the read) when the write happens immediately after. This
+ // version opens with FILE_SHARE_READ (normally not what you want when
+ // replacing the entire contents of the file) which lets us continue even if
+ // another program has the file open for reading. See
+ // http://crbug.com/468437
+ file_path_ = base::UTF16ToUTF8(file_path.value());
+ file_ = base::win::ScopedHandle(::CreateFile(
+ reinterpret_cast<LPCWSTR>(file_path.value().c_str()), GENERIC_WRITE,
+ FILE_SHARE_READ, NULL, CREATE_ALWAYS, 0, NULL));
+
+ valid_ = file_.IsValid();
+ if (!valid_) {
+ PLOG(ERROR) << "CreateFile failed for path " << file_path_;
+ }
+ return valid_;
+}
+
+bool FileWriter::Write(std::string_view str) {
+ if (!valid_)
+ return false;
+
+ DWORD written;
+ BOOL result =
+ ::WriteFile(file_.Get(), str.data(), str.size(), &written, nullptr);
+ if (!result) {
+ PLOG(ERROR) << "writing file " << file_path_ << " failed";
+ valid_ = false;
+ return false;
+ }
+ if (static_cast<size_t>(written) != str.size()) {
+ PLOG(ERROR) << "wrote " << written << " bytes to "
+ << file_path_ << " expected " << str.size();
+ valid_ = false;
+ return false;
+ }
+ return true;
+}
+
+bool FileWriter::Close() {
+ // NOTE: file_.Close() is not used here because it cannot return an error.
+ HANDLE handle = file_.Take();
+ if (handle && !::CloseHandle(handle))
+ return false;
+
+ return valid_;
+}
+
+#else // !OS_WIN
+
+bool FileWriter::Create(const base::FilePath& file_path) {
+ fd_.reset(HANDLE_EINTR(::creat(file_path.value().c_str(), 0666)));
+ valid_ = fd_.is_valid();
+ if (!valid_) {
+ PLOG(ERROR) << "creat() failed for path " << file_path.value();
+ }
+ return valid_;
+}
+
+bool FileWriter::Write(std::string_view str) {
+ if (!valid_)
+ return false;
+
+ while (!str.empty()) {
+ ssize_t written = HANDLE_EINTR(::write(fd_.get(), str.data(), str.size()));
+ if (written <= 0) {
+ valid_ = false;
+ return false;
+ }
+ str.remove_prefix(static_cast<size_t>(written));
+ }
+ return true;
+}
+
+bool FileWriter::Close() {
+ // The ScopedFD reset() method will crash on EBADF and ignore other errors
+ // intentionally, so no need to check anything here.
+ fd_.reset();
+ return valid_;
+}
+
+#endif // !OS_WIN
diff --git a/src/gn/file_writer.h b/src/gn/file_writer.h
new file mode 100644
index 0000000..0ae5061
--- /dev/null
+++ b/src/gn/file_writer.h
@@ -0,0 +1,74 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef TOOLS_GN_FILE_WRITER_H_
+#define TOOLS_GN_FILE_WRITER_H_
+
+#include "util/build_config.h"
+
+#if defined(OS_WIN)
+#include "base/win/scoped_handle.h"
+#else
+#include "base/files/scoped_file.h"
+#endif
+
+#include <string>
+
+namespace base {
+class FilePath;
+}
+
+// Convenience class to write data to a file. This is used to work around two
+// limitations of base::WriteFile, i.e.:
+//
+// - base::WriteFile() doesn't allow multiple writes to the target file.
+//
+//
+// - Windows-specific issues created by anti-virus programs required opening
+// the file differently (see http://crbug.com/468437).
+//
+// Usage is:
+// 1) Create instance.
+// 2) Call Create() to create the file.
+// 3) Call Write() one or more times to write data to it.
+// 4) Call Close(), or the destructor, to close the file.
+//
+// As soon as one method fails, all other calls will return false, this allows
+// simplified error checking as in:
+//
+// FileWriter writer;
+// writer.Create(<some_path>);
+// writer.Write(<some_data>);
+// writer.Write(<some_more_data>);
+// if (!writer.Close()) {
+// ... error happened in one of the above calls.
+// }
+//
+class FileWriter {
+ public:
+ FileWriter() = default;
+ ~FileWriter();
+
+ // Create output file. Return true on success, false otherwise.
+ bool Create(const base::FilePath& file_path);
+
+ // Append |data| to the output file. Return true on success, or false on
+ // failure or if any previous Create() or Write() call failed.
+ bool Write(std::string_view data);
+
+ // Close the file. Return true on success, or false on failure or if
+ // any previous Create() or Write() call failed.
+ bool Close();
+
+ private:
+#if defined(OS_WIN)
+ base::win::ScopedHandle file_;
+ std::string file_path_;
+#else
+ base::ScopedFD fd_;
+#endif
+ bool valid_ = true;
+};
+
+#endif // TOOLS_GN_FILE_WRITER_H_
diff --git a/src/gn/file_writer_unittest.cc b/src/gn/file_writer_unittest.cc
new file mode 100644
index 0000000..49f533a
--- /dev/null
+++ b/src/gn/file_writer_unittest.cc
@@ -0,0 +1,44 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gn/file_writer.h"
+
+#include "base/files/file_path.h"
+#include "base/files/scoped_temp_dir.h"
+#include "gn/filesystem_utils.h"
+
+#include "util/test/test.h"
+
+TEST(FileWriter, SingleWrite) {
+ base::ScopedTempDir temp_dir;
+ ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
+
+ std::string data = "foo";
+
+ base::FilePath file_path = temp_dir.GetPath().AppendASCII("foo.txt");
+
+ FileWriter writer;
+ EXPECT_TRUE(writer.Create(file_path));
+ EXPECT_TRUE(writer.Write(data));
+ EXPECT_TRUE(writer.Close());
+
+ EXPECT_TRUE(ContentsEqual(file_path, data));
+}
+
+TEST(FileWriter, MultipleWrites) {
+ base::ScopedTempDir temp_dir;
+ ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
+
+ std::string data = "Hello World!";
+
+ base::FilePath file_path = temp_dir.GetPath().AppendASCII("foo.txt");
+
+ FileWriter writer;
+ EXPECT_TRUE(writer.Create(file_path));
+ EXPECT_TRUE(writer.Write("Hello "));
+ EXPECT_TRUE(writer.Write("World!"));
+ EXPECT_TRUE(writer.Close());
+
+ EXPECT_TRUE(ContentsEqual(file_path, data));
+}
diff --git a/src/gn/filesystem_utils.cc b/src/gn/filesystem_utils.cc
index fee2596..bc95dce 100644
--- a/src/gn/filesystem_utils.cc
+++ b/src/gn/filesystem_utils.cc
@@ -7,9 +7,9 @@
#include <algorithm>
#include "base/files/file_util.h"
-#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
+#include "gn/file_writer.h"
#include "gn/location.h"
#include "gn/settings.h"
#include "gn/source_dir.h"
@@ -969,44 +969,10 @@
return false;
}
- int size = static_cast<int>(data.size());
- bool write_success = false;
-
-#if defined(OS_WIN)
- // On Windows, provide a custom implementation of base::WriteFile. Sometimes
- // the base version fails, especially on the bots. The guess is that Windows
- // Defender or other antivirus programs still have the file open (after
- // checking for the read) when the write happens immediately after. This
- // version opens with FILE_SHARE_READ (normally not what you want when
- // replacing the entire contents of the file) which lets us continue even if
- // another program has the file open for reading. See http://crbug.com/468437
- base::win::ScopedHandle file(::CreateFile(
- reinterpret_cast<LPCWSTR>(file_path.value().c_str()), GENERIC_WRITE,
- FILE_SHARE_READ, NULL, CREATE_ALWAYS, 0, NULL));
- if (file.IsValid()) {
- DWORD written;
- BOOL result = ::WriteFile(file.Get(), data.c_str(), size, &written, NULL);
- if (result) {
- if (static_cast<int>(written) == size) {
- write_success = true;
- } else {
- // Didn't write all the bytes.
- LOG(ERROR) << "wrote" << written << " bytes to "
- << base::UTF16ToUTF8(file_path.value()) << " expected "
- << size;
- }
- } else {
- // WriteFile failed.
- PLOG(ERROR) << "writing file " << base::UTF16ToUTF8(file_path.value())
- << " failed";
- }
- } else {
- PLOG(ERROR) << "CreateFile failed for path "
- << base::UTF16ToUTF8(file_path.value());
- }
-#else
- write_success = base::WriteFile(file_path, data.c_str(), size) == size;
-#endif
+ FileWriter writer;
+ writer.Create(file_path);
+ writer.Write(data);
+ bool write_success = writer.Close();
if (!write_success && err) {
*err = Err(Location(), "Unable to write file.",
diff --git a/src/gn/string_output_buffer.cc b/src/gn/string_output_buffer.cc
new file mode 100644
index 0000000..09913ce
--- /dev/null
+++ b/src/gn/string_output_buffer.cc
@@ -0,0 +1,117 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gn/string_output_buffer.h"
+
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "gn/err.h"
+#include "gn/file_writer.h"
+#include "gn/filesystem_utils.h"
+
+#include <fstream>
+
+std::string StringOutputBuffer::str() const {
+ std::string result;
+ size_t data_size = size();
+ result.reserve(data_size);
+ for (size_t nn = 0; nn < pages_.size(); ++nn) {
+ size_t wanted_size = std::min(kPageSize, data_size - nn * kPageSize);
+ result.append(pages_[nn]->data(), wanted_size);
+ }
+ return result;
+}
+
+void StringOutputBuffer::Append(const char* str, size_t len) {
+ Append(std::string_view(str, len));
+}
+
+void StringOutputBuffer::Append(std::string_view str) {
+ while (str.size() > 0) {
+ if (page_free_size() == 0) {
+ // Allocate a new page.
+ pages_.push_back(std::make_unique<Page>());
+ pos_ = 0;
+ }
+ size_t size = std::min(page_free_size(), str.size());
+ memcpy(pages_.back()->data() + pos_, str.data(), size);
+ pos_ += size;
+ str.remove_prefix(size);
+ }
+}
+
+void StringOutputBuffer::Append(char c) {
+ if (page_free_size() == 0) {
+ // Allocate a new page.
+ pages_.push_back(std::make_unique<Page>());
+ pos_ = 0;
+ }
+ pages_.back()->data()[pos_] = c;
+ pos_ += 1;
+}
+
+bool StringOutputBuffer::ContentsEqual(const base::FilePath& file_path) const {
+ // Compare file and stream sizes first. Quick and will save us some time if
+ // they are different sizes.
+ size_t data_size = size();
+ int64_t file_size;
+ if (!base::GetFileSize(file_path, &file_size) ||
+ static_cast<size_t>(file_size) != data_size) {
+ return false;
+ }
+
+ // Open the file in binary mode.
+ std::ifstream file(file_path.As8Bit().c_str(), std::ios::binary);
+ if (!file.is_open())
+ return false;
+
+ size_t page_count = pages_.size();
+ Page file_page;
+ for (size_t nn = 0; nn < page_count; ++nn) {
+ size_t wanted_size = std::min(data_size - nn * kPageSize, kPageSize);
+ file.read(file_page.data(), wanted_size);
+ if (!file.good())
+ return false;
+
+ if (memcmp(file_page.data(), pages_[nn]->data(), wanted_size) != 0)
+ return false;
+ }
+ return true;
+}
+
+// Write the contents of this instance to a file at |file_path|.
+bool StringOutputBuffer::WriteToFile(const base::FilePath& file_path,
+ Err* err) const {
+ // Create the directory if necessary.
+ if (!base::CreateDirectory(file_path.DirName())) {
+ if (err) {
+ *err =
+ Err(Location(), "Unable to create directory.",
+ "I was using \"" + FilePathToUTF8(file_path.DirName()) + "\".");
+ }
+ return false;
+ }
+
+ size_t data_size = size();
+ size_t page_count = pages_.size();
+
+ FileWriter writer;
+ bool success = writer.Create(file_path);
+ if (success) {
+ for (size_t nn = 0; nn < page_count; ++nn) {
+ size_t wanted_size = std::min(data_size - nn * kPageSize, kPageSize);
+ success = writer.Write(std::string_view(pages_[nn]->data(), wanted_size));
+ if (!success)
+ break;
+ }
+ }
+ if (!writer.Close())
+ success = false;
+
+ if (!success && err) {
+ *err = Err(Location(), "Unable to write file.",
+ "I was writing \"" + FilePathToUTF8(file_path) + "\".");
+ }
+ return success;
+}
diff --git a/src/gn/string_output_buffer.h b/src/gn/string_output_buffer.h
new file mode 100644
index 0000000..b42b638
--- /dev/null
+++ b/src/gn/string_output_buffer.h
@@ -0,0 +1,93 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef TOOLS_GN_STRING_OUTPUT_BUFFER_H_
+#define TOOLS_GN_STRING_OUTPUT_BUFFER_H_
+
+#include <array>
+#include <streambuf>
+#include <string>
+#include <vector>
+
+namespace base {
+class FilePath;
+} // namespace base
+
+class Err;
+
+// An append-only very large storage area for string data. Useful for the parts
+// of GN that need to generate huge output files (e.g. --ide=json will create
+// a 139 MiB project.json file for the Fuchsia build).
+//
+// Usage is the following:
+//
+// 1) Create instance.
+//
+// 2) Use operator<<, or Append() to append data to the instance.
+//
+// 3) Alternatively, create an std::ostream that takes its address as
+// argument, then use the output stream as usual to append data to it.
+//
+// StringOutputBuffer storage;
+// std::ostream out(&storage);
+// out << "Hello world!";
+//
+// 4) Use ContentsEqual() to compare the instance's content with that of a
+// given file.
+//
+// 5) Use WriteToFile() to write the content to a given file.
+//
+class StringOutputBuffer : public std::streambuf {
+ public:
+ StringOutputBuffer() = default;
+
+ // Convert content to single std::string instance. Useful for unit-testing.
+ std::string str() const;
+
+ // Return the number of characters stored in this instance.
+ size_t size() const { return (pages_.size() - 1u) * kPageSize + pos_; }
+
+ // Append string to this instance.
+ void Append(const char* str, size_t len);
+ void Append(std::string_view str);
+ void Append(char c);
+
+ StringOutputBuffer& operator<<(std::string_view str) {
+ Append(str);
+ return *this;
+ }
+
+ // Compare the content of this instance with that of the file at |file_path|.
+ bool ContentsEqual(const base::FilePath& file_path) const;
+
+ // Write the contents of this instance to a file at |file_path|.
+ bool WriteToFile(const base::FilePath& file_path, Err* err) const;
+
+ static size_t GetPageSizeForTesting() { return kPageSize; }
+
+ protected:
+ // Called by std::ostream to write |n| chars from |s|.
+ std::streamsize xsputn(const char* s, std::streamsize n) override {
+ Append(s, static_cast<size_t>(n));
+ return n;
+ }
+
+ // Called by std::ostream to write a single character.
+ int_type overflow(int_type ch) override {
+ Append(static_cast<char>(ch));
+ return 1;
+ }
+
+ private:
+ // Return the number of free bytes in the current page.
+ size_t page_free_size() const { return kPageSize - pos_; }
+
+ static constexpr size_t kPageSize = 65536;
+ using Page = std::array<char, kPageSize>;
+
+ size_t pos_ = kPageSize;
+ std::vector<std::unique_ptr<Page>> pages_;
+};
+
+#endif // TOOLS_GN_STRING_OUTPUT_BUFFER_H_
diff --git a/src/gn/string_output_buffer_unittest.cc b/src/gn/string_output_buffer_unittest.cc
new file mode 100644
index 0000000..6dcb741
--- /dev/null
+++ b/src/gn/string_output_buffer_unittest.cc
@@ -0,0 +1,134 @@
+// Copyright 2020 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gn/string_output_buffer.h"
+
+#include "base/files/file_path.h"
+#include "base/files/file_util.h"
+#include "base/files/scoped_temp_dir.h"
+#include "util/test/test.h"
+
+namespace {
+
+// Create a test string of |size| characters with pseudo-random ASCII content.
+std::string CreateTestString(size_t size, size_t seed = 0) {
+ std::string result;
+ result.resize(size);
+ for (size_t n = 0; n < size; ++n) {
+ int offset = (size + seed + n * 1337);
+ char ch = ' ' + offset % (127 - 32);
+ result[n] = ch;
+ }
+ return result;
+}
+
+} // namespace
+
+TEST(StringOutputBuffer, Append) {
+ const size_t data_size = 100000;
+ std::string data = CreateTestString(data_size);
+
+ const size_t num_spans = 50;
+ const size_t span_size = data_size / num_spans;
+
+ StringOutputBuffer buffer;
+
+ for (size_t n = 0; n < num_spans; ++n) {
+ size_t start_offset = n * span_size;
+ size_t end_offset = std::min(start_offset + span_size, data.size());
+ buffer.Append(&data[start_offset], end_offset - start_offset);
+ }
+
+ EXPECT_EQ(data.size(), buffer.size());
+ ASSERT_STREQ(data.c_str(), buffer.str().c_str());
+}
+
+TEST(StringOutputBuffer, AppendWithPageSizeMultiples) {
+ const size_t page_size = StringOutputBuffer::GetPageSizeForTesting();
+ const size_t page_count = 100;
+ const size_t data_size = page_size * page_count;
+ std::string data = CreateTestString(data_size);
+
+ StringOutputBuffer buffer;
+
+ for (size_t n = 0; n < page_count; ++n) {
+ size_t start_offset = n * page_size;
+ buffer.Append(&data[start_offset], page_size);
+ }
+
+ EXPECT_EQ(data.size(), buffer.size());
+ ASSERT_STREQ(data.c_str(), buffer.str().c_str());
+}
+
+TEST(StringOutput, WrappedByStdOstream) {
+ const size_t data_size = 100000;
+ std::string data = CreateTestString(data_size);
+
+ const size_t num_spans = 50;
+ const size_t span_size = data_size / num_spans;
+
+ StringOutputBuffer buffer;
+ std::ostream out(&buffer);
+
+ for (size_t n = 0; n < num_spans; ++n) {
+ size_t start_offset = n * span_size;
+ size_t end_offset = std::min(start_offset + span_size, data.size());
+ out << std::string_view(&data[start_offset], end_offset - start_offset);
+ }
+
+ EXPECT_EQ(data.size(), buffer.size());
+ ASSERT_STREQ(data.c_str(), buffer.str().c_str());
+}
+
+TEST(StringOutputBuffer, ContentsEqual) {
+ base::ScopedTempDir temp_dir;
+ ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
+
+ const size_t data_size = 100000;
+ std::string data = CreateTestString(data_size);
+
+ base::FilePath file_path = temp_dir.GetPath().AppendASCII("foo.txt");
+ base::WriteFile(file_path, data.c_str(), static_cast<int>(data.size()));
+
+ {
+ StringOutputBuffer buffer;
+ buffer.Append(data);
+
+ EXPECT_TRUE(buffer.ContentsEqual(file_path));
+
+ // Different length and contents.
+ buffer << "extra";
+ EXPECT_FALSE(buffer.ContentsEqual(file_path));
+ }
+
+ // The same length, different contents.
+ {
+ StringOutputBuffer buffer;
+ buffer << CreateTestString(data_size, 1);
+
+ EXPECT_FALSE(buffer.ContentsEqual(file_path));
+ }
+}
+
+TEST(StringOutputBuffer, WriteToFile) {
+ base::ScopedTempDir temp_dir;
+ ASSERT_TRUE(temp_dir.CreateUniqueTempDir());
+
+ const size_t data_size = 100000;
+ std::string data = CreateTestString(data_size);
+
+ // Write if file doesn't exist. Also create directory.
+ base::FilePath file_path =
+ temp_dir.GetPath().AppendASCII("bar").AppendASCII("foo.txt");
+
+ StringOutputBuffer buffer;
+ buffer.Append(data);
+
+ EXPECT_TRUE(buffer.WriteToFile(file_path, nullptr));
+
+ // Verify file was created and has same content.
+ base::File::Info file_info;
+ ASSERT_TRUE(base::GetFileInfo(file_path, &file_info));
+ ASSERT_TRUE(buffer.ContentsEqual(file_path));
+}